Loading Packages
library(tidyverse)
## Warning: replacing previous import 'vctrs::data_frame' by 'tibble::data_frame'
## when loading 'dplyr'
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gtrendsR)
Loading and cleaning Google Trends data
## Name order data
name_order = read.csv("./data/name_order.csv", skip = 2, col.names = c("state", "biden_trump", "trump_biden")) %>%
mutate_all(funs(str_replace(., "%", ""))) %>%
mutate(
biden_trump = as.numeric(biden_trump),
trump_biden = as.numeric(trump_biden)
)
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Today's searches for "voting"
#voting = read.csv("./data/voting.csv", skip = 2, col.names = c("state", "vote_voting"))
voting = read.csv("./data/voting_121920.csv", skip = 2, col.names = c("state", "vote_voting"))
## Change in voting searches since last election
voting_percentages = read.csv("./data/state_names.csv")
voting_percentages$state_code = paste0('US-', voting_percentages$state_abbreviation)
voting_percentages =
voting_percentages %>%
mutate(
data = map(state_code, ~(gtrends(keyword = "vote + voting", geo = .x, time = '2016-09-30 2020-10-31')$interest_over_time))
)
for (i in 1:51) {
voting_percentages$data[[i]]$hits = str_replace(voting_percentages$data[[i]]$hits, "<1", "0")
voting_percentages$data[[i]]$hits = as.numeric(voting_percentages$data[[i]]$hits)
}
voting_percentages =
voting_percentages %>%
unnest(cols = data) %>%
select(state, state_abbreviation, date, hits) %>%
mutate(date = as.Date(date)) %>%
filter(
date <= "2016-10-29" | date >= "2020-10-01" ## Change to october 4th
)
voting_percentages =
voting_percentages %>%
separate(date, c("year", "month", "day"), "-")
voting_percentages =
voting_percentages %>%
group_by(state_abbreviation, year) %>%
summarize(october_average = mean(hits)) %>%
pivot_wider(names_from = year,
values_from = october_average)
## `summarise()` regrouping output by 'state_abbreviation' (override with `.groups` argument)
voting_percentages =
voting_percentages %>%
rename(
october_2016 = "2016",
october_2020 = "2020"
) %>%
mutate(
percent_change = ((october_2020 - october_2016)/october_2016)*100
) %>%
mutate(
percent_change = round(percent_change)
)
Map 1: Name order
name_order$hover = with(name_order,
paste(state, '<br>',
'"Trump Biden" =', trump_biden,"%", '<br>',
'"Biden Trump" =', biden_trump,"%"))
g = list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = FALSE,
lake_color = toRGB('white')
)
map1 = plot_geo(name_order, locationmode = "USA-states", width=800, height=400)
map1 =
map1 %>%
add_trace(
z = ~trump_biden, text = ~hover, locations = ~state,
color = ~trump_biden, colors = 'Reds',
hoverinfo = "text"
)
map1 = map1 %>% colorbar(title = '%"Trump Biden"')
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
map1 = map1 %>% layout(
title = "Order of Presidential Candidates' Names in Google Searches<br>(Hover for breakdown)",
geo = g
)
map1
Map 2: Voting Searches
voting$hover = with(voting,
paste(state, '<br>',
'Relative Proportion =', vote_voting))
map2 = plot_geo(voting, locationmode = "USA-states", width=800, height=400)
map2 =
map2 %>%
add_trace(
z = ~vote_voting, text = ~hover, locations = ~state,
color = ~vote_voting, colors = 'Blues',
hoverinfo = "text"
)
map2 = map2 %>% colorbar(title = 'Relative Search Vol.')
map2 = map2 %>% layout(
title = 'Google Searches Containing "vote" or "voting"',
geo = g
)
map2
Map 3: % Change in Voting Searches
voting_percentages$hover = with(voting_percentages,
paste(state_abbreviation, '<br>',
'% Change =', percent_change,'%'))
map3 = plot_geo(voting_percentages, locationmode = "USA-states", width=800, height=400)
map3 =
map3 %>%
add_trace(
z = ~percent_change, text = ~hover, locations = ~state_abbreviation,
autocolorscale = TRUE,
hoverinfo = "text"
)
map3 = map3 %>% colorbar(title = '% Change')
map3 = map3 %>% layout(
title = 'Percent Change in Google Searches Containing "vote" or "voting"<br>October 2020 vs. October 2016',
geo = g
)
map3
#Sys.setenv('plotly_username'='xxxx')
#Sys.setenv('plotly_api_key'='xxxx')
#api_create(map1, filename = 'map1', sharing = 'public')
#api_create(map2, filename = 'map2v2', sharing = 'public')
#api_create(map3, filename = 'map3v2', sharing = 'public')
Correlation calculation
correlation = read_csv("./data/correlation.csv")
## Parsed with column specification:
## cols(
## state_abbreviation = col_character(),
## percent_change = col_double(),
## turnout_change = col_double()
## )
correlation =
correlation %>%
mutate(
percent_change = log(percent_change),
turnout_change = log(turnout_change)
)
cor.test(correlation$percent_change, correlation$turnout_change, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: correlation$percent_change and correlation$turnout_change
## t = -1.2099, df = 49, p-value = 0.2321
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4259150 0.1104447
## sample estimates:
## cor
## -0.1703231
correlation %>%
ggplot() +
geom_point(aes(x = percent_change, y = turnout_change)) +
labs(
title = "% Change in Google searches vs. % Change in voter turnout",
x = "% Change in Google searches, 2016-2020",
y = "% Change in voter turnout, 2016-2020"
) +
theme_minimal()

0.1703231*0.1703231
## [1] 0.02900996